/*-
 * Copyright 2015 Toomas Soome <tsoome@me.com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */


/*
 * relocate is needed to support loading code which has to be located
 * below 1MB, as both BTX and loader are using low memory area.
 *
 * relocate and start loaded code. Since loaded code may need to be
 * placed to already occupied memory area, this code is moved to safe
 * memory area and then btx __exec will be called with physical pointer
 * to this area. __exec will set pointer to %eax and use call *%eax,
 * so on entry, we have new "base" address in %eax.
 *
 * Relocate will first set up and load new safe GDT to shut down BTX,
 * then loaded code will be relocated to final memory location,
 * then machine will be switched from 32bit protected mode to 16bit
 * protected mode following by switch to real mode with A20 enabled or
 * disabled. Finally the loaded code will be started and it will take
 * over the whole system.
 *
 * For now, the known "safe" memory area for relocate is 0x600,
 * the actual "free" memory is supposed to start from 0x500, leaving
 * first 0x100 bytes in reserve. As relocate code+data is very small,
 * it will leave enough space to set up boot blocks to 0:7c00 or load
 * linux kernel below 1MB space.
 */
/*
 * segment selectors
 */
		.set SEL_SCODE,0x8
		.set SEL_SDATA,0x10
		.set SEL_RCODE,0x18
		.set SEL_RDATA,0x20

		.p2align	4
		.globl relocater
relocater:
		cli
		/*
		 * set up GDT from new location
		 */
		movl	%eax, %esi		/* our base address */
		add	$(relocater.1-relocater), %eax
		jmp	*%eax
relocater.1:
		/* set up jump */
		lea	(relocater.2-relocater)(%esi), %eax
		movl	%eax, (jump_vector-relocater) (%esi)

		/* set up gdt */
		lea	(gdt-relocater) (%esi), %eax
		movl	%eax, (gdtaddr-relocater) (%esi)

		/* load gdt */
		lgdt	(gdtdesc - relocater) (%esi)
		lidt	(idt-relocater) (%esi)

		/* update cs */
		ljmp *(jump_vector-relocater) (%esi)

		.code32
relocater.2:
		xorl	%eax, %eax
		movb	$SEL_SDATA, %al
		movw	%ax, %ss
		movw	%ax, %ds
		movw	%ax, %es
		movw	%ax, %fs
		movw	%ax, %gs
		movl	%cr0, %eax		/* disable paging */
		andl	$~0x80000000,%eax
		movl	%eax, %cr0
		xorl	%ecx, %ecx		/* flush TLB */
		movl	%ecx, %cr3
		cld
/*
 * relocate data loop. load source, dest and size from
 * relocater_data[i], 0 value will stop the loop.
 * registers used for move: %esi, %edi, %ecx.
 * %ebx to keep base
 * %edx for relocater_data offset
 */
		movl	%esi, %ebx		/* base address */
		xorl	%edx, %edx
loop.1:
		movl	(relocater_data-relocater)(%ebx, %edx, 4), %eax
		testl	%eax, %eax
		jz	loop.2
		movl	(relocater_data-relocater)(%ebx, %edx, 4), %esi
		inc	%edx
		movl	(relocater_data-relocater)(%ebx, %edx, 4), %edi
		inc	%edx
		movl	(relocater_data-relocater)(%ebx, %edx, 4), %ecx
		inc	%edx
		rep
		movsb
		jmp	loop.1
loop.2:
		movl	%ebx, %esi		/* restore esi */
		/*
		 * data is relocated, switch to 16bit mode
		 */
		lea	(relocater.3-relocater)(%esi), %eax
		movl	%eax, (jump_vector-relocater) (%esi)
		movl	$SEL_RCODE, %eax
		movl	%eax, (jump_vector-relocater+4) (%esi)

		ljmp *(jump_vector-relocater) (%esi)
relocater.3:
		.code16

		movw	$SEL_RDATA, %ax
		movw	%ax, %ds
		movw	%ax, %es
		movw	%ax, %fs
		movw	%ax, %gs
		movw	%ax, %ss
		lidt	(idt-relocater) (%esi)
		lea	(relocater.4-relocater)(%esi), %eax
		movl	%eax, (jump_vector-relocater) (%esi)
		xorl	%eax, %eax
		movl	%eax, (jump_vector-relocater+4) (%esi)
		/* clear PE */
		movl	%cr0, %eax
		dec	%al
		movl	%eax, %cr0
		ljmp *(jump_vector-relocater) (%esi)
relocater.4:
		xorw	%ax, %ax
		movw	%ax, %ds
		movw	%ax, %es
		movw	%ax, %fs
		movw	%ax, %gs
		movw	%ax, %ss
		/*
		 * set real mode irq offsets
		 */
		movw	$0x7008,%bx
		in $0x21,%al			# Save master
		push %ax			#  IMR
		in $0xa1,%al			# Save slave
		push %ax			#  IMR
		movb $0x11,%al			# ICW1 to
		outb %al,$0x20			#  master,
		outb %al,$0xa0			#  slave
		movb %bl,%al			# ICW2 to
		outb %al,$0x21			#  master
		movb %bh,%al			# ICW2 to
		outb %al,$0xa1			#  slave
		movb $0x4,%al			# ICW3 to
		outb %al,$0x21			#  master
		movb $0x2,%al			# ICW3 to
		outb %al,$0xa1			#  slave
		movb $0x1,%al			# ICW4 to
		outb %al,$0x21			#  master,
		outb %al,$0xa1			#  slave
		pop %ax				# Restore slave
		outb %al,$0xa1			#  IMR
		pop %ax				# Restore master
		outb %al,$0x21			#  IMR
						# done
		/*
		 * Should A20 be left enabled?
		 */
		/* movw imm16, %ax */
		.byte	0xb8
		.globl	relocator_a20_enabled
relocator_a20_enabled:
		.word	0
		test	%ax, %ax
		jnz	a20_done

		movw	$0xa00, %ax
		movw	%ax, %sp
		movw	%ax, %bp

		/* Disable A20 */
		movw	$0x2400, %ax
		int	$0x15
#		jnc	a20_done

		call	a20_check_state
		testb	%al, %al
		jz	a20_done

		inb	$0x92
		andb	$(~0x03), %al
		outb	$0x92
		jmp	a20_done

a20_check_state:
		movw	$100, %cx
1:
		xorw	%ax, %ax
		movw	%ax, %ds
		decw	%ax
		movw	%ax, %es
		xorw	%ax, %ax
		movw	$0x8000, %ax
		movw	%ax, %si
		addw	$0x10, %ax
		movw	%ax, %di
		movb	%ds:(%si), %dl
		movb	%es:(%di), %al
		movb	%al, %dh
		decb	%dh
		movb	%dh, %ds:(%si)
		outb	%al, $0x80
		outb	%al, $0x80
		movb	%es:(%di), %dh
		subb	%dh, %al
		xorb	$1, %al
		movb	%dl, %ds:(%si)
		testb	%al, %al
		jz	a20_done
		loop	1b
		ret
a20_done:
		/*
		 * set up registers
		 */
		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_ds
relocator_ds:	.word	0
		movw	%ax, %ds

		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_es
relocator_es:	.word	0
		movw	%ax, %es

		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_fs
relocator_fs:	.word	0
		movw	%ax, %fs

		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_gs
relocator_gs:	.word	0
		movw	%ax, %gs

		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_ss
relocator_ss:	.word	0
		movw	%ax, %ss

		/* movw imm16, %ax. */
		.byte	0xb8
		.globl	relocator_sp
relocator_sp:	.word	0
		movzwl	%ax, %esp

		/* movw imm32, %eax. */
		.byte	0x66, 0xb8
		.globl	relocator_esi
relocator_esi:	.long	0
		movl	%eax, %esi

		/* movw imm32, %edx. */
		.byte	0x66, 0xba
		.globl	relocator_edx
relocator_edx:	.long	0

		/* movw imm32, %ebx. */
		.byte	0x66, 0xbb
		.globl	relocator_ebx
relocator_ebx:	.long	0

		/* movw imm32, %eax. */
		.byte	0x66, 0xb8
		.globl	relocator_eax
relocator_eax:	.long	0

		/* movw imm32, %ebp. */
		.byte	0x66, 0xbd
		.globl	relocator_ebp
relocator_ebp:	.long	0

		sti
		.byte 0xea			 /* ljmp */
		.globl relocator_ip
relocator_ip:
		.word 0
		.globl relocator_cs
relocator_cs:
		.word 0

/* GDT to reset BTX */
		.code32
		.p2align	4
jump_vector:	.long	0
		.long	SEL_SCODE

gdt:		.word 0x0, 0x0			/* null entry */
		.byte 0x0, 0x0, 0x0, 0x0
		.word 0xffff, 0x0		/* SEL_SCODE */
		.byte 0x0, 0x9a, 0xcf, 0x0
		.word 0xffff, 0x0		/* SEL_SDATA */
		.byte 0x0, 0x92, 0xcf, 0x0
		.word 0xffff, 0x0		/* SEL_RCODE */
		.byte 0x0, 0x9a, 0x0f, 0x0
		.word 0xffff, 0x0		/* SEL_RDATA */
		.byte 0x0, 0x92, 0x0f, 0x0
gdt.1:

gdtdesc:	.word gdt.1 - gdt - 1		/* limit */
gdtaddr:	.long 0				/* base */

idt:		.word 0x3ff
		.long 0

		.globl relocater_data
relocater_data:
		.long 0			/* src */
		.long 0			/* dest */
		.long 0			/* size */
		.long 0			/* src */
		.long 0			/* dest */
		.long 0			/* size */
		.long 0			/* src */
		.long 0			/* dest */
		.long 0			/* size */
		.long 0

		.globl relocater_size
relocater_size:
		.long relocater_size-relocater
